import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn import tree
iris=load_iris()
print(iris.feature_names)
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
print(iris.target_names)
['setosa' 'versicolor' 'virginica']
#Spilitting the dataset
removed =[0,50,100]
new_target = np.delete(iris.target,removed)
new_data = np.delete(iris.data,removed, axis=0)
#train classifier
clf = tree.DecisionTreeClassifier() # defining decision tree classifier
clf=clf.fit(new_data,new_target) # train data on new data and new target
prediction = clf.predict(iris.data[removed]) # assign removed data as input
print("Original Labels",iris.target[removed])
print("Labels Predicted",prediction)
Original Labels [0 1 2] Labels Predicted [0 1 2]
tree.plot_tree(clf)
[Text(167.4, 199.32, 'X[3] <= 0.8\ngini = 0.667\nsamples = 147\nvalue = [49, 49, 49]'), Text(141.64615384615385, 163.07999999999998, 'gini = 0.0\nsamples = 49\nvalue = [49, 0, 0]'), Text(193.15384615384616, 163.07999999999998, 'X[3] <= 1.75\ngini = 0.5\nsamples = 98\nvalue = [0, 49, 49]'), Text(103.01538461538462, 126.83999999999999, 'X[2] <= 4.95\ngini = 0.171\nsamples = 53\nvalue = [0, 48, 5]'), Text(51.50769230769231, 90.6, 'X[3] <= 1.65\ngini = 0.042\nsamples = 47\nvalue = [0, 46, 1]'), Text(25.753846153846155, 54.359999999999985, 'gini = 0.0\nsamples = 46\nvalue = [0, 46, 0]'), Text(77.26153846153846, 54.359999999999985, 'gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]'), Text(154.52307692307693, 90.6, 'X[3] <= 1.55\ngini = 0.444\nsamples = 6\nvalue = [0, 2, 4]'), Text(128.76923076923077, 54.359999999999985, 'gini = 0.0\nsamples = 3\nvalue = [0, 0, 3]'), Text(180.27692307692308, 54.359999999999985, 'X[2] <= 5.45\ngini = 0.444\nsamples = 3\nvalue = [0, 2, 1]'), Text(154.52307692307693, 18.119999999999976, 'gini = 0.0\nsamples = 2\nvalue = [0, 2, 0]'), Text(206.03076923076924, 18.119999999999976, 'gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]'), Text(283.2923076923077, 126.83999999999999, 'X[2] <= 4.85\ngini = 0.043\nsamples = 45\nvalue = [0, 1, 44]'), Text(257.53846153846155, 90.6, 'X[1] <= 3.1\ngini = 0.444\nsamples = 3\nvalue = [0, 1, 2]'), Text(231.7846153846154, 54.359999999999985, 'gini = 0.0\nsamples = 2\nvalue = [0, 0, 2]'), Text(283.2923076923077, 54.359999999999985, 'gini = 0.0\nsamples = 1\nvalue = [0, 1, 0]'), Text(309.04615384615386, 90.6, 'gini = 0.0\nsamples = 42\nvalue = [0, 0, 42]')]
#Splitting the dataset into training and testing variables
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2,random_state=20)
#keeping 80% as training data and 20% as testing data.
X_train
array([[5.7, 3. , 4.2, 1.2],
[5.6, 3. , 4.1, 1.3],
[5.1, 3.5, 1.4, 0.2],
[5.1, 3.8, 1.6, 0.2],
[4.8, 3.4, 1.6, 0.2],
[4.9, 2.5, 4.5, 1.7],
[6. , 3.4, 4.5, 1.6],
[4.9, 3. , 1.4, 0.2],
[6.4, 3.2, 4.5, 1.5],
[7.4, 2.8, 6.1, 1.9],
[5.7, 2.8, 4.5, 1.3],
[6.1, 2.6, 5.6, 1.4],
[4.9, 3.6, 1.4, 0.1],
[6.7, 3.1, 4.4, 1.4],
[6.3, 3.3, 4.7, 1.6],
[4.7, 3.2, 1.3, 0.2],
[5.4, 3.9, 1.7, 0.4],
[5. , 3.3, 1.4, 0.2],
[5.7, 2.5, 5. , 2. ],
[5.1, 3.8, 1.9, 0.4],
[6.4, 2.8, 5.6, 2.2],
[6.2, 3.4, 5.4, 2.3],
[5.4, 3.4, 1.5, 0.4],
[6.3, 3.3, 6. , 2.5],
[6. , 2.2, 4. , 1. ],
[5.6, 2.9, 3.6, 1.3],
[5.6, 3. , 4.5, 1.5],
[4.5, 2.3, 1.3, 0.3],
[5.1, 3.4, 1.5, 0.2],
[5.5, 2.4, 3.8, 1.1],
[6.2, 2.9, 4.3, 1.3],
[5.8, 2.6, 4. , 1.2],
[5.6, 2.5, 3.9, 1.1],
[6.7, 3.1, 4.7, 1.5],
[6.5, 2.8, 4.6, 1.5],
[5.7, 2.9, 4.2, 1.3],
[5.1, 3.3, 1.7, 0.5],
[6.2, 2.2, 4.5, 1.5],
[6.3, 2.3, 4.4, 1.3],
[4.9, 3.1, 1.5, 0.1],
[4.4, 3. , 1.3, 0.2],
[5.4, 3. , 4.5, 1.5],
[5.5, 2.3, 4. , 1.3],
[6.1, 3. , 4.6, 1.4],
[5.1, 3.8, 1.5, 0.3],
[5.3, 3.7, 1.5, 0.2],
[6.4, 3.2, 5.3, 2.3],
[5.5, 3.5, 1.3, 0.2],
[4.8, 3.1, 1.6, 0.2],
[5. , 2. , 3.5, 1. ],
[6. , 3. , 4.8, 1.8],
[5.8, 2.7, 5.1, 1.9],
[5.6, 2.8, 4.9, 2. ],
[6.3, 2.9, 5.6, 1.8],
[6.4, 2.8, 5.6, 2.1],
[7.1, 3. , 5.9, 2.1],
[5.8, 2.7, 3.9, 1.2],
[6.5, 3. , 5.5, 1.8],
[7.7, 3. , 6.1, 2.3],
[7.2, 3.6, 6.1, 2.5],
[4.4, 2.9, 1.4, 0.2],
[6.8, 3. , 5.5, 2.1],
[5.2, 3.5, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
[5.8, 4. , 1.2, 0.2],
[6.7, 3. , 5.2, 2.3],
[6.9, 3.1, 4.9, 1.5],
[5.9, 3.2, 4.8, 1.8],
[6.9, 3.1, 5.4, 2.1],
[6.3, 2.7, 4.9, 1.8],
[5.6, 2.7, 4.2, 1.3],
[4.4, 3.2, 1.3, 0.2],
[6.1, 3. , 4.9, 1.8],
[5.5, 4.2, 1.4, 0.2],
[5. , 2.3, 3.3, 1. ],
[7.7, 3.8, 6.7, 2.2],
[4.8, 3.4, 1.9, 0.2],
[6. , 2.9, 4.5, 1.5],
[6.1, 2.9, 4.7, 1.4],
[6.2, 2.8, 4.8, 1.8],
[4.8, 3. , 1.4, 0.3],
[4.7, 3.2, 1.6, 0.2],
[7.2, 3.2, 6. , 1.8],
[6.7, 3.3, 5.7, 2.5],
[5.2, 2.7, 3.9, 1.4],
[5.5, 2.6, 4.4, 1.2],
[5.7, 2.6, 3.5, 1. ],
[6.7, 3.3, 5.7, 2.1],
[6.7, 3.1, 5.6, 2.4],
[5. , 3.5, 1.6, 0.6],
[6.9, 3.2, 5.7, 2.3],
[5.1, 3.5, 1.4, 0.3],
[6.7, 3. , 5. , 1.7],
[4.6, 3.1, 1.5, 0.2],
[5.7, 3.8, 1.7, 0.3],
[6.3, 2.5, 5. , 1.9],
[6.3, 3.4, 5.6, 2.4],
[7.3, 2.9, 6.3, 1.8],
[5.4, 3.7, 1.5, 0.2],
[5.2, 4.1, 1.5, 0.1],
[6.5, 3.2, 5.1, 2. ],
[6. , 2.7, 5.1, 1.6],
[5.9, 3. , 4.2, 1.5],
[7.9, 3.8, 6.4, 2. ],
[5. , 3. , 1.6, 0.2],
[6.9, 3.1, 5.1, 2.3],
[4.6, 3.4, 1.4, 0.3],
[5. , 3.4, 1.5, 0.2],
[5.8, 2.7, 5.1, 1.9],
[7.6, 3. , 6.6, 2.1],
[5.4, 3.9, 1.3, 0.4],
[5. , 3.4, 1.6, 0.4],
[5. , 3.5, 1.3, 0.3],
[7.7, 2.6, 6.9, 2.3],
[6.1, 2.8, 4. , 1.3],
[4.6, 3.6, 1. , 0.2],
[6.6, 3. , 4.4, 1.4],
[6.4, 3.1, 5.5, 1.8],
[5.7, 4.4, 1.5, 0.4],
[5.7, 2.8, 4.1, 1.3]])
X_test
array([[4.6, 3.2, 1.4, 0.2],
[6.1, 2.8, 4.7, 1.2],
[6.4, 2.9, 4.3, 1.3],
[7.2, 3. , 5.8, 1.6],
[5.8, 2.7, 4.1, 1. ],
[5.5, 2.5, 4. , 1.3],
[6.8, 3.2, 5.9, 2.3],
[5.1, 3.7, 1.5, 0.4],
[6.7, 2.5, 5.8, 1.8],
[4.8, 3. , 1.4, 0.1],
[6.5, 3. , 5.2, 2. ],
[6.8, 2.8, 4.8, 1.4],
[6. , 2.2, 5. , 1.5],
[5. , 3.2, 1.2, 0.2],
[5.2, 3.4, 1.4, 0.2],
[7.7, 2.8, 6.7, 2. ],
[4.3, 3. , 1.1, 0.1],
[6.6, 2.9, 4.6, 1.3],
[5.8, 2.8, 5.1, 2.4],
[4.9, 2.4, 3.3, 1. ],
[7. , 3.2, 4.7, 1.4],
[5.9, 3. , 5.1, 1.8],
[6.4, 2.7, 5.3, 1.9],
[5.4, 3.4, 1.7, 0.2],
[6.3, 2.5, 4.9, 1.5],
[5.5, 2.4, 3.7, 1. ],
[5.1, 2.5, 3. , 1.1],
[4.9, 3.1, 1.5, 0.2],
[6.5, 3. , 5.8, 2.2],
[6.3, 2.8, 5.1, 1.5]])
clf.fit(X_train, y_train)
DecisionTreeClassifier()
clf.score(X_test, y_test)
0.9333333333333333